! pip install scikeras
import tensorflow as tf
import numpy as np
import random
import os
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorflow import keras
from tensorflow.keras import models, layers, optimizers, regularizers
from tensorflow.keras.utils import to_categorical, plot_model
from keras.preprocessing.image import ImageDataGenerator
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split, GridSearchCV
from scikeras.wrappers import KerasClassifier
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, f1_score, classification_report
from prettytable import PrettyTable
# helper function for loading the dataframe, oversampling and reshaping images, and assigning the data onto training labels
def load_fer_dataset(df) :
width, height = 48, 48
x_train, x_test = df['pixels'], df['emotion']
oversampler = RandomOverSampler(sampling_strategy='auto') # oversampling dataset to fix the imbalance problem
x_train, x_test = oversampler.fit_resample(x_train.values.reshape(-1,1), x_test)
x_train = pd.Series(x_train.flatten())
x_train = np.array(list(map(str.split, x_train)), 'float32') / 255
x_train = x_train.reshape(-1, width, height, 1)
x_test = np.array(x_test)
x_test = x_test.reshape(x_test.shape[0], 1)
return train_test_split(x_train, x_test, test_size=0.2, random_state=45) # 20% test data
def preprocess_labels(y_train, y_test, y_val, num_classes):
return to_categorical(y_train, num_classes), to_categorical(y_test, num_classes), to_categorical(y_val, num_classes) # one hot encoding the training set, test set and validation set
# plotting 7 samples from each emotion classes
def plot_all_emotions():
fig = plt.figure(1, (12, 12))
fig.suptitle("Images representing each emotion", fontsize=20, weight='bold')
img_count = 0
for num_emotion in sorted(df_explore.emotion.unique()):
for emotion in range(len(df_explore.emotion.unique())):
pixel = df_explore[df_explore.emotion == num_emotion].pixels.iloc[img_count]
pixel = np.array(pixel.split(' ')).reshape(48, 48).astype('float32')
img_count += 1
axis = plt.subplot(len(df_explore.emotion.unique()),
len(df_explore.emotion.unique()),
img_count)
axis.imshow(pixel, cmap='gray')
axis.axis('off')
axis.set_title(decoded_emotions[num_emotion], fontsize=12)
return plt.show()
# plotting one sample belonging to a specific emotion class
def plot_one_emotion(emotion, location):
pixel = df_explore[df_explore.emotion == emotion].pixels.iloc[location]
pixel = np.array(pixel.split(' ')).reshape(48, 48).astype('float32')
plt.title(decoded_emotions[emotion], fontsize=12, weight='bold')
return plt.imshow(pixel, cmap='gray')
# plotting all samples of images that have been augmented
def plot_augmentation_samples(train_datagen):
fig = plt.figure(figsize=(25, 10))
fig.suptitle("Data Augmentation Samples", fontsize=20, weight='bold')
rows = 3
columns = 10
iterator = train_datagen.flow(x_train, batch_size=64)
for i in range(rows * columns):
fig.add_subplot(rows, columns, i+1)
plt.grid(False)
batch = iterator.next()
plt.imshow(np.squeeze(batch[0]), cmap=plt.cm.gray)
plt.xticks([])
plt.yticks([])
save_figure("Data Augmentation Samples", tight_layout=False)
plt.show()
# helper function for training the model i.e. running for epochs
def train_model(model, epoch, batch):
callbacks_list = [
EarlyStopping( # if the model doesn't improve its validation accuracy for up to 5 epochs, it will stop training
monitor='val_accuracy',
patience=3, # represents the number of epochs with no improvement to which the training will be terminated
verbose=1,
restore_best_weights=True
),
ModelCheckpoint( # if the model reaches an optimal validation loss, it will be saved onto the directory so it can be loaded later
filepath='best-model.h5',
verbose=1,
monitor='val_loss',
save_best_only=True,
),
ReduceLROnPlateau(
monitor='val_loss',
factor=np.sqrt(0.1),
patience=3,
verbose=1,
min_delta=0.0001
)
]
return model.fit(x_train,
y_train,
epochs = epoch,
batch_size = batch,
callbacks=callbacks_list,
validation_data = (x_val, y_val))
# helper function for retrieving a data augmentation generator
def get_augmented_generator(train_datagen, batch):
return train_datagen.flow(x_train, y_train, batch_size=batch)
# helper function for training the model with the data augmentation generator
def train_augmented_model(model, train_datagen, epoch, batch):
callbacks_list = [
EarlyStopping( # if the model doesn't improve its validation accuracy for up to 5 epochs, it will stop training
monitor='val_accuracy',
patience=3, # represents the number of epochs with no improvement to which the training will be terminated
verbose=1,
restore_best_weights=True
),
ModelCheckpoint( # if the model reaches an optimal validation loss, it will be saved onto the directory so it can be loaded later
filepath='best-model.h5',
verbose=1,
monitor='val_loss',
save_best_only=True,
),
ReduceLROnPlateau(
monitor='val_loss',
factor=0.2,
patience=3,
verbose=1,
min_delta=0.0001
),
]
return model.fit(get_augmented_generator(train_datagen, batch),
epochs = epoch,
steps_per_epoch = x_train.shape[0] // batch,
validation_data = (x_val, y_val),
callbacks=callbacks_list,
validation_steps = len(x_val) // batch)
# retrieves all history keys of the model
def get_history_keys(history):
return (history.history,
history.history["loss"],
history.history["val_loss"],
history.history["accuracy"],
history.history["val_accuracy"])
# plots both the model loss and accuracy
def plot_model_history(history, name=""):
history_dict, loss, val_loss, acc, val_acc = get_history_keys(history) # get histories
epochs = range(1, len(loss) + 1)
blue_dots = 'bo'
solid_blue_line = 'b'
# defining the history subplots
values, axis = plt.subplots(1, 2)
values.suptitle(name, fontsize=14)
values.set_size_inches(14, 6)
# plotting the values on the axis for each subplot
axis[0].plot(epochs, loss, blue_dots, label = 'Training loss')
axis[0].plot(epochs, val_loss, solid_blue_line, label = 'Validation loss')
axis[1].plot(epochs, acc, blue_dots, label = 'Training acc')
axis[1].plot(epochs, val_acc, solid_blue_line, label = 'Validation acc')
# defining the labels
plt.setp(axis[0], xlabel='Epochs')
plt.setp(axis[0], ylabel='Loss')
plt.setp(axis[1], xlabel='Epochs')
plt.setp(axis[1], ylabel='Accuracy')
axis[0].set_title('Training and validation loss')
axis[0].legend()
axis[1].set_title('Training and validation acc')
axis[1].legend()
# printing out minimum/maximum validation loss and accuracy
print(f"\nMin validation loss: {str(min(val_loss))} \nMax validation loss: {str(max(val_loss))} \nMin validation acc: {str(min(val_acc))} \nMax validation acc: {str(max(val_acc))}")
# retrieves all history keys from the two models
def get_comparison_history_keys(history1, history2):
return (history1.history,
history2.history,
history1.history["val_loss"],
history2.history["val_loss"],
history1.history["val_accuracy"],
history2.history["val_accuracy"])
def compare_model_history(history1, history2, name="", key1="", key2=""):
history_dict1, history_dict2, model_1_val_loss, model_2_val_loss, model_1_val_acc, model_2_val_acc = get_comparison_history_keys(history1, history2)
epochs = range(1, len(model_1_val_loss) + 1)
# defining the history subplots
values, axis = plt.subplots(1, 2)
values.suptitle(name, fontsize=14)
values.set_size_inches(14, 6)
# plotting the values on the axis for each subplot
axis[0].plot(epochs, model_1_val_loss, 'g', label = key1 + ' Loss')
axis[0].plot(epochs, model_2_val_loss, 'b', label = key2 + ' Loss')
axis[1].plot(epochs, model_1_val_acc, 'g', label = key1 + ' Accuracy')
axis[1].plot(epochs, model_2_val_acc, 'b', label = key2 + ' Accuracy')
# defining the labels
plt.setp(axis[0], xlabel='Epochs')
plt.setp(axis[0], ylabel='Loss')
plt.setp(axis[1], xlabel='Epochs')
plt.setp(axis[1], ylabel='Accuracy')
axis[0].set_title('Validation Loss')
axis[0].legend()
axis[1].set_title('Validation Accuracy')
axis[1].legend()
# printing out minimum/maximum validation loss and accuracy for each model
print(f"\n{key1} validation loss: {str(min(model_1_val_loss))} \n{key2} validation loss: {str(min(model_2_val_loss))}")
print(f"\n{key1} validation accuracy: {str(max(model_1_val_acc))} \n{key2} validation accuracy: {str(max(model_2_val_acc))}")
# helper function to plot the grid search results
def plot_grid_search_results(results):
print(f"\nBest score = {'%.2f' % results.best_score_} using {results.best_params_}\n")
mean_score = results.cv_results_['mean_test_score']
standard_deviation = results.cv_results_['std_test_score']
parameters = results.cv_results_['params']
optimised_model = grid_searcher_result.best_estimator_
accuracy = optimised_model.score(x_test, y_test)
for mean, stdev, param in zip(mean_score, standard_deviation, parameters):
print('mean test accuracy +/- std = {:.4f} +/- {:.4f} with: {}'.format(mean, stdev, param))
print("\nAccuracy achieved on the best model")
print("{:.2f}%".format(accuracy * 100))
# helper function for saving the visualisations and images onto the directory
def save_figure(figure_name, tight_layout=True, fig_extension="png", resolution=300):
path = os.path.join(".", figure_name + "." + fig_extension)
print(figure_name, "has been saved")
if tight_layout:
plt.tight_layout()
plt.savefig(path, format=fig_extension, dpi=resolution)
df = pd.read_csv('/content/drive/MyDrive/Full-FER-2013-Dataset/fer2013.csv')
df.head()
df.tail()
df.info()
df.describe()
df['emotion'].value_counts()
df_explore = df.copy()
df_explore.head()
decoded_emotions = {0:'Angry',
1:'Disgusted',
2:'Scared',
3:'Happy',
4: 'Sad',
5: 'Suprised',
6: 'Neutral'}
print("Distribution of samples per emotion:\n{}".format(df_explore.groupby("emotion").size()))
df_explore['emotion'].isna().sum()
df_explore['pixels'].isna().sum()
df_explore['Usage'].isna().sum()
df_explore.hist(bins=30, figsize=(10, 5), legend=True, color = (0.5,0.8,0.2,1))
plt.title('Visualisation of samples per emotion')
plt.xlabel('emotion')
plt.ylabel('count')
plt.xticks(np.array(list(decoded_emotions.keys())), decoded_emotions.values())
for i in range(len(df_explore["emotion"].value_counts())):
plt.annotate(df_explore["emotion"].value_counts()[i], (i, df_explore["emotion"].value_counts()[i] + 50))
plt.tight_layout()
plt.show()
plot_all_emotions()
# plot_one_emotion(emotion=1, location=1) # specific location
emotion = 3 # select an emotion key
plot_one_emotion(emotion=emotion, location=random.randint(1, 100)) # random location
# Training: 70
# Testing: 20
# Validation: 10
x_train, x_test, y_train, y_test = load_fer_dataset(df) # splitting the data into train/test
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=45) # creating a validation set
num_classes = len(decoded_emotions)
y_train, y_test, y_val = preprocess_labels(y_train, y_test, y_val, num_classes=num_classes) # preprocessing labels
len(df)
len(x_train) + len(x_test) + len(x_val) # after oversampling
len(x_train), len(x_test), len(x_val)
def model_1():
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(32, kernel_size=3, strides=1, padding="valid", activation='relu', input_shape=(x_train.shape[1:])))
# 2nd convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding="same", activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
# 3rd convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding="valid", activation='relu'))
# 4th convolutional layer
model.add(layers.Conv2D(128, kernel_size=3, strides=1, padding="same", activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
# 5th convolutional layer
model.add(layers.Conv2D(128, kernel_size=3, strides=1, padding="valid", activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
# Dense layers
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = model_1()
plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)
model.summary()
history1 = train_model(model, 30, 64)
model.evaluate(x_test, y_test)
def model_2():
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 3rd convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 4th convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
# 2nd fully connected dense layer
model.add(layers.Dense(512, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = model_2()
plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)
model.summary()
history2 = train_model(model, 30, 64)
model.evaluate(x_test, y_test)
def model_3():
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation=tf.keras.layers.LeakyReLU(), input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 3rd convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 4th convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation=tf.keras.layers.LeakyReLU()))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation=tf.keras.layers.LeakyReLU()))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
# 2nd fully connected dense layer
model.add(layers.Dense(512, activation=tf.keras.layers.LeakyReLU()))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = model_3()
plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)
model.summary()
history3 = train_model(model, 30, 64)
model.evaluate(x_test, y_test)
train_datagen = ImageDataGenerator(
rotation_range=35,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.4,
zoom_range=0.4,
fill_mode='nearest'
)
plot_augmentation_samples(train_datagen)
model = model_2()
history4 = train_augmented_model(model, train_datagen, 80, 128)
model.evaluate(x_test, y_test)
plot_model_history(history1, name="CNN Model 1")
plot_model_history(history2, name="CNN Model 2")
plot_model_history(history3, name="CNN Model 2 with Leaky ReLu")
plot_model_history(history4, name="CNN Model 2 with Data Augmentation")
After analysing all the results, Model 2 has proven to be the most promising. Therefore, it will be shortlisted for fine-tuning.
def create_model(pool_type='max', conv_activation='relu', dropout_rate=0.25):
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation=conv_activation, input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
if pool_type == 'max':
model.add(layers.MaxPooling2D((2, 2)))
if pool_type == 'average':
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation=conv_activation))
model.add(layers.BatchNormalization())
if pool_type == 'max':
model.add(layers.MaxPooling2D((2, 2)))
if pool_type == 'average':
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 3rd convolutional layer
model.add(layers.Conv2D(256, kernel_size=3, strides=1, padding='same', activation=conv_activation))
model.add(layers.BatchNormalization())
if pool_type == 'max':
model.add(layers.MaxPooling2D((2, 2)))
if pool_type == 'average':
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 4th convolutional layer
model.add(layers.Conv2D(256, kernel_size=3, strides=1, padding='same', activation=conv_activation))
model.add(layers.BatchNormalization())
if pool_type == 'max':
model.add(layers.MaxPooling2D((2, 2)))
if pool_type == 'average':
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation=conv_activation))
model.add(layers.BatchNormalization())
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 2nd fully connected dense layer
model.add(layers.Dense(256, activation=conv_activation))
model.add(layers.BatchNormalization())
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
# wrapping the existing model around KerasClassifier to use it with scikit-learn
model = KerasClassifier(build_fn=create_model, pool_type='max', conv_activation='relu', dropout_rate=0.25, verbose=1)
# specifying the hyperparameters to be tuned during the grid search
param_grid = {
'pool_type': ['max', 'average'],
'conv_activation': ['relu', tf.keras.layers.LeakyReLU(), 'tanh'],
'epochs': [30],
}
# fitting the model and evaluating the results
grid_searcher = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_searcher_result = grid_searcher.fit(x_train, y_train)
plot_grid_search_results(grid_searcher_result)
def create_model(kernel_size=3, dropout_rate=0.25):
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=kernel_size, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 3rd convolutional layer
model.add(layers.Conv2D(256, kernel_size=kernel_size, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 4th convolutional layer
model.add(layers.Conv2D(256, kernel_size=kernel_size, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 2nd fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
# wrapping the existing model around KerasClassifier to use it with scikit-learn
model = KerasClassifier(build_fn=create_model, kernel_size=3, dropout_rate=0.25, verbose=1)
# specifying the hyperparameters to be tuned during the grid search
param_grid = {
'kernel_size': [3, 5, 7],
'dropout_rate': [0.2, 0.25, 0.3, 0.35],
'epochs': [30],
'batch_size': [64, 128],
}
# fitting the model and evaluating the results
grid_searcher = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_searcher_result = grid_searcher.fit(x_train, y_train)
plot_grid_search_results(grid_searcher_result)
def create_model(padding='same', dropout_rate=0.2):
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=5, strides=1, padding=padding, activation='relu', input_shape=(48, 48, 1)))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding=padding, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 3rd convolutional layer
model.add(layers.Conv2D(256, kernel_size=5, strides=1, padding=padding, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 4th convolutional layer
model.add(layers.Conv2D(256, kernel_size=5, strides=1, padding=padding, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 2nd fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
# wrapping the existing model around KerasClassifier to use it with scikit-learn
model = KerasClassifier(build_fn=create_model, padding='same', dropout_rate=0.2, verbose=1)
# specifying the hyperparameters to be tuned during the grid search
param_grid = {
'padding': ['same', 'valid', 'causal'],
'epochs': [30],
'batch_size': [128],
}
# fitting the model and evaluating the results
grid_searcher = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_searcher_result = grid_searcher.fit(x_train, y_train)
plot_grid_search_results(grid_searcher_result)
def create_model(optimizer=optimizers.Adam, learning_rate=0.0001, dropout_rate=0.2):
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 3rd convolutional layer
model.add(layers.Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 4th convolutional layer
model.add(layers.Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# 2nd fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
if dropout_rate != 0:
model.add(layers.Dropout(dropout_rate))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizer(learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
return model
# wrapping the existing model around KerasClassifier to use it with scikit-learn
model = KerasClassifier(build_fn=create_model, optimizer=optimizers.Adam, learning_rate=0.0001, dropout_rate=0.2, verbose=1)
# specifying the hyperparameters to be tuned during the grid search
param_grid = {
'optimizer': [optimizers.RMSprop, optimizers.Adagrad, optimizers.Adam, optimizers.Adamax, optimizers.Nadam],
'learning_rate': [0.0001, 0.00001, 0.0005, 0.00005],
'epochs': [30],
'batch_size': [64, 128],
}
# fitting the model and evaluating the results
grid_searcher = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_searcher_result = grid_searcher.fit(x_train, y_train)
plot_grid_search_results(grid_searcher_result)
def combined_model_1():
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 3rd convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 4th convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
# 2nd fully connected dense layer
model.add(layers.Dense(512, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = combined_model_1()
plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)
model.summary()
history5 = train_model(model, 30, 64)
model.evaluate(x_test, y_test)
plot_model_history(history5, name="Combined Model 1")
compare_model_history(history2, history5, name="Model 2 vs Combined Model 1", key1="Model 2", key2="Combined Model 1")
def combined_model_2():
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))
# 3rd convolutional layer
model.add(layers.Conv2D(512, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))
# 4th convolutional layer
model.add(layers.Conv2D(512, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.2))
# 2nd fully connected dense layer
model.add(layers.Dense(512, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.2))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = combined_model_2()
plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)
model.summary()
history6 = train_model(model, 30, 64)
model.evaluate(x_test, y_test)
plot_model_history(history6, name="Combined Model 2")
compare_model_history(history2, history6, name="Model 2 vs Combined Model 2", key1="Model 2", key2="Combined Model 2")
def combined_model_3():
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 3rd convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# 4th convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
# 2nd fully connected dense layer
model.add(layers.Dense(512, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.25))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adamax(0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = combined_model_3()
plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)
model.summary()
history7 = train_model(model, 30, 64)
model.evaluate(x_test, y_test)
plot_model_history(history7, name="Combined Model 3")
compare_model_history(history2, history7, name="Model 2 vs Combined Model 3", key1="Model 2", key2="Combined Model 3")
def final_combined_model():
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.3))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.3))
# 3rd convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.3))
# 4th convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.3))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.3))
# 2nd fully connected dense layer
model.add(layers.Dense(512, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.3))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adamax(0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = final_combined_model()
plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)
model.summary()
history8 = train_model(model, 30, 64)
model.evaluate(x_test, y_test)
plot_model_history(history8, name="Final Combined Model")
compare_model_history(history2, history8, name="Model 2 vs Final Combined Model", key1="Model 2", key2="Final Combined Model")
train_datagen = ImageDataGenerator(
rotation_range=10, # rotating images randomly from 0 to 180 degrees
zoom_range = 0.1, # zooming into the image
width_shift_range=0.1, # horizontally shifting the images
height_shift_range=0.1, # vertically shifting the images
horizontal_flip=True, # allows horizontal flipping of images
vertical_flip=True, # allows vertical flipping of images
fill_mode='nearest') # replaces the empty area with the nearest pixel values
plot_augmentation_samples(train_datagen)
model = final_combined_model()
history9 = train_augmented_model(model, train_datagen, 80, 128)
model.evaluate(x_test, y_test)
plot_model_history(history9, name="Final Combined Model with Data Augmentation")
def final_model():
model = models.Sequential()
# 1st convolutional layer
model.add(layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1:])))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.3))
# 2nd convolutional layer
model.add(layers.Conv2D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.3))
# 3rd convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.3))
# 4th convolutional layer
model.add(layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.AveragePooling2D((2, 2)))
model.add(layers.Dropout(0.35))
model.add(layers.Flatten())
# 1st fully connected dense layer
model.add(layers.Dense(256, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.3))
# 2nd fully connected dense layer
model.add(layers.Dense(512, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dropout(0.3))
# final dense layer with number of classes
model.add(layers.Dense(7, activation='softmax'))
# compile the model
model.compile(optimizer=optimizers.Adamax(learning_rate=0.0005, beta_1=0.9, beta_2=0.999), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = final_model()
save_figure("Final Model Architecture")
plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)
model.summary()
history10 = train_model(model, 30, 64)
model.evaluate(x_test, y_test)
model.save('final-model.h5', overwrite=True)
train_datagen = ImageDataGenerator(
rotation_range=10, # rotating images randomly from 0 to 180 degrees
zoom_range = 0.1, # zooming into the image
width_shift_range=0.1, # horizontally shifting the images
height_shift_range=0.1, # vertically shifting the images
horizontal_flip=False, # doesn't allow horizontal flipping of images
vertical_flip=False, # doesn't allow vertical flipping of images
fill_mode='nearest') # replaces the empty area with the nearest pixel values
plot_augmentation_samples(train_datagen)
model = final_model()
history11 = train_augmented_model(model, train_datagen, 80, 128)
model.evaluate(x_test, y_test)
model.save('augmented-model.h5', overwrite=True)
plot_model_history(history10, name="Final CNN Model Test")
plot_model_history(history10, name="Final CNN Model")
plot_model_history(history11, name="Final CNN Model with Data Augmentation")
# model = tf.keras.models.load_model('final-model.h5')
model = tf.keras.models.load_model('/content/drive/MyDrive/FER H5/final-model.h5') # saved the best model on google drive
data = []
predicted = np.argmax(model.predict(x_test), axis=1)
actual = np.argmax(y_test, axis=1)
validate = []
table = PrettyTable(['Predicted Emotion', 'Actual Emotion', 'Predicted Emotion Text', 'Actual Emotion Text', 'Validate'])
for i in range(50):
validate.append('True' if predicted[i] == actual[i] else 'False')
data.append([predicted[i],
actual[i],
decoded_emotions[predicted[i]],
decoded_emotions[actual[i]],
validate[i]]
)
for record in data:
table.add_row(record)
print(table)
true_predictions = validate.count('True')
print(f"\nNumber of true predictions: {true_predictions}/{len(validate)}")
fig = plt.figure(figsize=(25, 15))
fig.suptitle("Predictions vs Truth", fontsize=20, weight='bold')
rows = 5
columns = 10
for i in range(rows * columns):
fig.add_subplot(rows, columns, i+1)
plt.grid(False)
plt.imshow(np.squeeze(x_test[i]), cmap=plt.cm.gray)
plt.xticks([])
plt.yticks([])
if decoded_emotions[predicted[i]] != decoded_emotions[actual[i]]:
plt.title("Predicted: " + decoded_emotions[predicted[i]], color='r')
else:
plt.title("Predicted: " + decoded_emotions[predicted[i]], color='g')
plt.xlabel("Actual: " + decoded_emotions[actual[i]], color='b')
save_figure("Predictions vs Truth", tight_layout=False)
plt.show()
print(classification_report(actual, predicted, target_names=decoded_emotions.values()))
confusion_matrix = tf.math.confusion_matrix(actual, predicted)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix, annot=True,
cmap='Blues', fmt='d',
xticklabels=decoded_emotions.values(),
yticklabels=decoded_emotions.values())
plt.xlabel('Predicted')
plt.ylabel('Actual')
save_figure("Confusion Matrix")
model = tf.keras.models.load_model('augmented-model.h5')
data = []
predicted = np.argmax(model.predict(x_test), axis=1)
actual = np.argmax(y_test, axis=1)
validate = []
table = PrettyTable(['Predicted Emotion', 'Actual Emotion', 'Predicted Emotion Text', 'Actual Emotion Text', 'Validate'])
for i in range(50):
validate.append('True' if predicted[i] == actual[i] else 'False')
data.append([predicted[i],
actual[i],
decoded_emotions[predicted[i]],
decoded_emotions[actual[i]],
validate[i]]
)
for record in data:
table.add_row(record)
print(table)
true_predictions = validate.count('True')
print(f"\nNumber of true predictions: {true_predictions}/{len(validate)}")
fig = plt.figure(figsize=(25, 15))
fig.suptitle("Predictions vs Truth", fontsize=20, weight='bold')
rows = 5
columns = 10
for i in range(rows * columns):
fig.add_subplot(rows, columns, i+1)
plt.grid(False)
plt.imshow(np.squeeze(x_test[i]), cmap=plt.cm.gray)
plt.xticks([])
plt.yticks([])
if decoded_emotions[predicted[i]] != decoded_emotions[actual[i]]:
plt.title("Predicted: " + decoded_emotions[predicted[i]], color='r')
else:
plt.title("Predicted: " + decoded_emotions[predicted[i]], color='g')
plt.xlabel("Actual: " + decoded_emotions[actual[i]], color='b')
save_figure("Predictions vs Truth", tight_layout=False)
plt.show()
print(classification_report(actual, predicted, target_names=decoded_emotions.values()))
confusion_matrix = tf.math.confusion_matrix(actual, predicted)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix, annot=True,
cmap='Blues', fmt='d',
xticklabels=decoded_emotions.values(),
yticklabels=decoded_emotions.values())
plt.xlabel('Predicted')
plt.ylabel('Actual')
save_figure("Confusion Matrix")
| Model | Validation Loss | Validation Accuracy | True predictions on first 50 test images | Best Model? |
|---|---|---|---|---|
| Final Model | 0.5459 | 0.8518 | 46/50 | Yes |
| Final Model with Data Augmentation | 0.6261 | 0.7821 | 34/50 | No |
fer_to_json = model.to_json()
with open("best-model.json", "w") as json_file:
json_file.write(fer_to_json)